import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# 1. 读取原始全量事件数据
df = pd.read_excel("Both Data.xlsx", engine="openpyxl")

# 2. 解析 MonthYear 列为 Period
df['Month'] = pd.to_datetime(df['MonthYear'].astype(str), format='%Y%m', errors='coerce').dt.to_period('M')

# 3. 标记 CT/TC 事件
is_ct = (df['Actor1CountryCode']=='CHN') & (df['Actor2CountryCode']=='TWN')
is_tc = (df['Actor1CountryCode']=='TWN') & (df['Actor2CountryCode']=='CHN')

# 4. 按月分组，统计事件数量与提及数，并累加 Goldstein 和 AvgTone 权重和
monthly = df.groupby('Month').apply(lambda g: pd.Series({
    'TotalEvents':   len(g),
    'CT_Events':     is_ct[g.index].sum(),
    'TC_Events':     is_tc[g.index].sum(),
    'TotalMentions': g['NumMentions'].sum(),
    'CT_Mentions':   g.loc[is_ct[g.index], 'NumMentions'].sum(),
    'TC_Mentions':   g.loc[is_tc[g.index], 'NumMentions'].sum(),
    'ImpactRaw':     (g['GoldsteinScale'] * g['NumMentions']).sum(),
    'ToneRaw':       (g['AvgTone']       * g['NumMentions']).sum()
}))

# 5. 依据统计结果计算加权指标的分母
monthly['ImpactIndex'] = np.where(
    monthly['TotalMentions']>0,
    (monthly['ImpactRaw'] / monthly['TotalMentions'] + 10) / 20,
    np.nan
)
monthly['ToneIndex'] = np.where(
    monthly['TotalMentions']>0,
    (monthly['ToneRaw']   / monthly['TotalMentions'] + 10) / 20,
    np.nan
)

# 6. 计算 GlobalAttention 与 BalanceIndex
monthly['GlobalAttention'] = np.log1p(monthly['TotalMentions'])
monthly['BalanceIndex'] = np.where(
    monthly['TotalEvents']>0,
    monthly['CT_Events'] / monthly['TotalEvents'],
    0.5
)

# 7. 输出结果
out = monthly.reset_index()
out['Month_dt'] = out['Month'].dt.to_timestamp()
cols = [
    'Month_dt',
    'TotalEvents','CT_Events','TC_Events',
    'TotalMentions','CT_Mentions','TC_Mentions',
    'GlobalAttention','BalanceIndex','ImpactIndex','ToneIndex'
]
out.to_excel("Monthly_Indices_Full.xlsx", index=False, columns=cols)
print("已保存：Monthly_Indices_Full.xlsx")

# 8. 绘图（可选）
def plot(col, title, ylabel, fname, color):
    fig, ax = plt.subplots(figsize=(10,4))
    ax.plot(out['Month_dt'], out[col], '-o', color=color)
    ax.set_title(title)
    ax.set_xlabel("Month")
    ax.set_ylabel(ylabel)
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.xticks(rotation=45)
    ax.set_xlim(out['Month_dt'].min(), out['Month_dt'].max())
    fig.tight_layout()
    fig.savefig(fname)
    plt.close(fig)

plot('GlobalAttention', "Global Media Attention",       "log(1+TotalMentions)", "GlobalAttention.png", "blue")
plot('BalanceIndex',    "Coverage Balance (Events)",    "CT_Events/TotalEvents", "BalanceIndex.png",  "orange")
plot('ImpactIndex',     "Normalized Impact Index",      "ImpactIndex [0-1]",      "ImpactIndex.png",   "green")
plot('ToneIndex',       "Normalized Tone Index",        "ToneIndex [0-1]",        "ToneIndex.png",     "red")

print("生成图像：GlobalAttention.png, BalanceIndex.png, ImpactIndex.png, ToneIndex.png")
